titanic <- read.csv("Titanic.csv")
head(titanic,5)
## Class Survived Name Sex Age
## 1 1 1 Allen, Miss. Elisabeth Walton female 29.0000
## 2 1 1 Allison, Master. Hudson Trevor male 0.9167
## 3 1 0 Allison, Miss. Helen Loraine female 2.0000
## 4 1 0 Allison, Mr. Hudson Joshua Creighton male 30.0000
## 5 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000
## Dept SibSp ParCh Ticket Fare Cabin Embarked Job
## 1 passenger 0 0 24160 211.3375 B5 S
## 2 passenger 1 2 113781 151.55 C22 C26 S
## 3 passenger 1 2 113781 151.55 C22 C26 S
## 4 passenger 1 2 113781 151.55 C22 C26 S
## 5 passenger 1 2 113781 151.55 C22 C26 S
# Filter out missing values in the "Survived" column
titanic <- titanic %>% filter(!is.na(Survived))
ggplot(data = titanic, aes(x = Age)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
labs(x = "Age", y = "Count") +
ggtitle("Age Distribution of Passengers")
## Warning: Removed 34 rows containing non-finite outside the scale range
## (`stat_bin()`).
titanic %>%
mutate(Age_Group = cut(Age, breaks = c(0, 18, 30, 50, Inf), labels = c("0-18", "19-30", "31-50", "51+"))) %>%
group_by(Age_Group) %>%
summarise(Survival_Rate = mean(Survived)) %>%
ggplot(aes(x = Age_Group, y = Survival_Rate)) +
geom_bar(stat = "identity", fill = "skyblue", color = "black") +
labs(x = "Age Group", y = "Survival Rate") +
ggtitle("Survival Rate by Age Group")
# Bar plot of Survived column
ggplot(data = titanic, aes(x = factor(Survived))) +
geom_bar() +
labs(x = "Survived", y = "Count")
# Group by Sex and calculate survival rate
survival_by_sex <- titanic %>%
group_by(Sex) %>%
summarise(survival_rate = mean(Survived))
# Bar plot of survival rate by Sex
ggplot(data = survival_by_sex, aes(x = Sex, y = survival_rate)) +
geom_col() +
labs(x = "Sex", y = "Survival Rate")
# Group by Pclass and calculate survival rate
survival_by_class <- titanic %>%
group_by(Class) %>%
summarise(survival_rate = mean(Survived))
# Bar plot of survival rate by Pclass
ggplot(data = survival_by_class, aes(x = factor(Class), y = survival_rate)) +
geom_col() +
labs(x = "Class", y = "Survival Rate")
survival_by_sex <- titanic %>%
group_by(Sex) %>%
summarise(survival_rate = mean(Survived))
# Bar plot of survival rate by Sex
ggplot(data = survival_by_sex, aes(x = Sex, y = survival_rate)) +
geom_col(fill = "skyblue", color = "black") +
labs(x = "Sex", y = "Survival Rate") +
ggtitle("Survival Rate by Sex")
ggplot(data = titanic, aes(x = factor(Class), fill = factor(Survived))) +
geom_bar() +
facet_wrap(~Sex) +
labs(x = "Class", y = "Count", fill = "Survived") +
ggtitle("Survival Count by Class and Sex")
`
# Group by Age and calculate survival rate
survival_by_age <- titanic %>%
group_by(Age) %>%
summarise(survival_rate = mean(Survived))
# Line plot of survival rate by Age
ggplot(data = survival_by_age, aes(x = Age, y = survival_rate)) +
geom_line() +
labs(x = "Age", y = "Survival Rate")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
# Load necessary libraries
library(plotly)
# Scatter plot of Fare vs. Age colored by Survived
interact <- ggplot(data = titanic, aes(x = Age, y = Fare, color = factor(Survived))) +
geom_point() +
labs(x = "Age", y = "Fare", color = "Survived") +
ggtitle("Fare vs. Age Colored by Survival")
# Convert ggplot to plotly
interactive <- ggplotly(interact)
# Show interactive plot
interactive
# Calculate survival rate by Sex, Class, and Age
survival_by_group <- titanic %>%
group_by(Sex, Class, Age) %>%
summarise(survival_rate = mean(Survived))
## `summarise()` has grouped output by 'Sex', 'Class'. You can override using the
## `.groups` argument.
# Create an interactive bar plot using plotly
plotly_bar_plot <- ggplot(data = survival_by_group, aes(x = interaction(Sex, Class, Age), y = survival_rate)) +
geom_bar(stat = "identity", fill = "skyblue", color = "black") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(x = "Sex, Class, Age", y = "Survival Rate") +
ggtitle("Survival Rate by Sex, Class, and Age") +
theme_minimal()
plotly_bar_plot <- ggplotly(plotly_bar_plot)
plotly_bar_plot